{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv('2017版(含附录)国民经济行业分类(GBT4754—2017).csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>门类</th>\n",
       "      <th>大类</th>\n",
       "      <th>中类</th>\n",
       "      <th>小类</th>\n",
       "      <th>代码</th>\n",
       "      <th>说明</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>农、林、牧、渔业</td>\n",
       "      <td>本门类包括01～05大类</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>农业</td>\n",
       "      <td>指对各种农作物的种植</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>11.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>谷物种植</td>\n",
       "      <td>指以收获籽实为主的农作物的种植，包括稻谷、小麦、玉米等农作物的种植和作为饲料和工业原料的...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>111.0</td>\n",
       "      <td>稻谷种植</td>\n",
       "      <td>稻谷种植</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>112.0</td>\n",
       "      <td>小麦种植</td>\n",
       "      <td>小麦种植</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    门类   大类    中类     小类        代码  \\\n",
       "0    A  NaN   NaN    NaN  农、林、牧、渔业   \n",
       "1  NaN  1.0   NaN    NaN        农业   \n",
       "2  NaN  NaN  11.0    NaN      谷物种植   \n",
       "3  NaN  NaN   NaN  111.0      稻谷种植   \n",
       "4  NaN  NaN   NaN  112.0      小麦种植   \n",
       "\n",
       "                                                  说明  \n",
       "0                                       本门类包括01～05大类  \n",
       "1                                         指对各种农作物的种植  \n",
       "2    指以收获籽实为主的农作物的种植，包括稻谷、小麦、玉米等农作物的种植和作为饲料和工业原料的...  \n",
       "3                                               稻谷种植  \n",
       "4                                               小麦种植  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_fill= df.fillna(method='ffill')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>门类</th>\n",
       "      <th>大类</th>\n",
       "      <th>中类</th>\n",
       "      <th>小类</th>\n",
       "      <th>代码</th>\n",
       "      <th>说明</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>农、林、牧、渔业</td>\n",
       "      <td>本门类包括01～05大类</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>农业</td>\n",
       "      <td>指对各种农作物的种植</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A</td>\n",
       "      <td>1.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>谷物种植</td>\n",
       "      <td>指以收获籽实为主的农作物的种植，包括稻谷、小麦、玉米等农作物的种植和作为饲料和工业原料的...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A</td>\n",
       "      <td>1.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>111.0</td>\n",
       "      <td>稻谷种植</td>\n",
       "      <td>稻谷种植</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>A</td>\n",
       "      <td>1.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>112.0</td>\n",
       "      <td>小麦种植</td>\n",
       "      <td>小麦种植</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  门类   大类    中类     小类        代码  \\\n",
       "0  A  NaN   NaN    NaN  农、林、牧、渔业   \n",
       "1  A  1.0   NaN    NaN        农业   \n",
       "2  A  1.0  11.0    NaN      谷物种植   \n",
       "3  A  1.0  11.0  111.0      稻谷种植   \n",
       "4  A  1.0  11.0  112.0      小麦种植   \n",
       "\n",
       "                                                  说明  \n",
       "0                                       本门类包括01～05大类  \n",
       "1                                         指对各种农作物的种植  \n",
       "2    指以收获籽实为主的农作物的种植，包括稻谷、小麦、玉米等农作物的种植和作为饲料和工业原料的...  \n",
       "3                                               稻谷种植  \n",
       "4                                               小麦种植  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_fill.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>门类</th>\n",
       "      <th>大类</th>\n",
       "      <th>中类</th>\n",
       "      <th>小类</th>\n",
       "      <th>代码</th>\n",
       "      <th>说明</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1768</th>\n",
       "      <td>S</td>\n",
       "      <td>96.0</td>\n",
       "      <td>961.0</td>\n",
       "      <td>9610.0</td>\n",
       "      <td>社区居民自治组织</td>\n",
       "      <td>指城市、镇的居民通过选举产生的群众性自治组织的管理活动</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1769</th>\n",
       "      <td>S</td>\n",
       "      <td>96.0</td>\n",
       "      <td>962.0</td>\n",
       "      <td>9620.0</td>\n",
       "      <td>村民自治组织</td>\n",
       "      <td>指农村村民通过选举产生的群众性自治组织的管理活动</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1770</th>\n",
       "      <td>T</td>\n",
       "      <td>96.0</td>\n",
       "      <td>962.0</td>\n",
       "      <td>9620.0</td>\n",
       "      <td>国际组织</td>\n",
       "      <td>本门类包括97大类</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1771</th>\n",
       "      <td>T</td>\n",
       "      <td>97.0</td>\n",
       "      <td>962.0</td>\n",
       "      <td>9620.0</td>\n",
       "      <td>国际组织</td>\n",
       "      <td>国际组织</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1772</th>\n",
       "      <td>T</td>\n",
       "      <td>97.0</td>\n",
       "      <td>970.0</td>\n",
       "      <td>9700.0</td>\n",
       "      <td>国际组织</td>\n",
       "      <td>指联合国和其他国际组织驻我国境内机构等活动</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     门类    大类     中类      小类          代码                             说明\n",
       "1768  S  96.0  961.0  9610.0    社区居民自治组织    指城市、镇的居民通过选举产生的群众性自治组织的管理活动\n",
       "1769  S  96.0  962.0  9620.0      村民自治组织       指农村村民通过选举产生的群众性自治组织的管理活动\n",
       "1770  T  96.0  962.0  9620.0        国际组织                      本门类包括97大类\n",
       "1771  T  97.0  962.0  9620.0        国际组织                           国际组织\n",
       "1772  T  97.0  970.0  9700.0        国际组织          指联合国和其他国际组织驻我国境内机构等活动"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_fill.tail()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(?=.*光伏)(?=.*太阳能)\n"
     ]
    }
   ],
   "source": [
    "key_word = ['光伏','太阳能']\n",
    "str = ''\n",
    "for i in key_word:\n",
    "    str += '(?=.*{})'.format(i)\n",
    "print(str)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>门类</th>\n",
       "      <th>大类</th>\n",
       "      <th>中类</th>\n",
       "      <th>小类</th>\n",
       "      <th>代码</th>\n",
       "      <th>说明</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Empty DataFrame\n",
       "Columns: [门类, 大类, 中类, 小类, 代码, 说明]\n",
       "Index: []"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_fill[df_fill['说明'].str.contains(str)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>门类</th>\n",
       "      <th>大类</th>\n",
       "      <th>中类</th>\n",
       "      <th>小类</th>\n",
       "      <th>代码</th>\n",
       "      <th>说明</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>708</th>\n",
       "      <td>C</td>\n",
       "      <td>35.0</td>\n",
       "      <td>356.0</td>\n",
       "      <td>3562.0</td>\n",
       "      <td>半导体器件专用设备制造</td>\n",
       "      <td>指生产集成电路、二极管（含发光二极管）、三极管、太阳能电池片的设备的制造</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>796</th>\n",
       "      <td>C</td>\n",
       "      <td>38.0</td>\n",
       "      <td>382.0</td>\n",
       "      <td>3825.0</td>\n",
       "      <td>光伏设备及元器件制造</td>\n",
       "      <td>指太阳能组件（太阳能电池）、控制设备及其他太阳能设备和元器件制造；不包括太阳能用蓄电池制造</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>804</th>\n",
       "      <td>C</td>\n",
       "      <td>38.0</td>\n",
       "      <td>384.0</td>\n",
       "      <td>3839.0</td>\n",
       "      <td>电池制造</td>\n",
       "      <td>指以正极活性材料、负极活性材料，配合电介质，以密封式结构制成的，并具有一定公称电压和额定...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>821</th>\n",
       "      <td>C</td>\n",
       "      <td>38.0</td>\n",
       "      <td>386.0</td>\n",
       "      <td>3862.0</td>\n",
       "      <td>太阳能器具制造</td>\n",
       "      <td>太阳能器具制造</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>928</th>\n",
       "      <td>D</td>\n",
       "      <td>44.0</td>\n",
       "      <td>441.0</td>\n",
       "      <td>4416.0</td>\n",
       "      <td>太阳能发电</td>\n",
       "      <td>太阳能发电</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>980</th>\n",
       "      <td>E</td>\n",
       "      <td>48.0</td>\n",
       "      <td>487.0</td>\n",
       "      <td>4875.0</td>\n",
       "      <td>太阳能发电工程施工</td>\n",
       "      <td>太阳能发电工程施工</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    门类    大类     中类      小类               代码  \\\n",
       "708  C  35.0  356.0  3562.0      半导体器件专用设备制造   \n",
       "796  C  38.0  382.0  3825.0       光伏设备及元器件制造   \n",
       "804  C  38.0  384.0  3839.0             电池制造   \n",
       "821  C  38.0  386.0  3862.0          太阳能器具制造   \n",
       "928  D  44.0  441.0  4416.0            太阳能发电   \n",
       "980  E  48.0  487.0  4875.0        太阳能发电工程施工   \n",
       "\n",
       "                                                    说明  \n",
       "708               指生产集成电路、二极管（含发光二极管）、三极管、太阳能电池片的设备的制造  \n",
       "796      指太阳能组件（太阳能电池）、控制设备及其他太阳能设备和元器件制造；不包括太阳能用蓄电池制造  \n",
       "804    指以正极活性材料、负极活性材料，配合电介质，以密封式结构制成的，并具有一定公称电压和额定...  \n",
       "821                                            太阳能器具制造  \n",
       "928                                              太阳能发电  \n",
       "980                                          太阳能发电工程施工  "
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_fill[df_fill['说明'].str.contains('光伏|太阳能')]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
